In [1]:

    
%run "../0. config.ipynb"

data preparation



In [2]:

    
list_types = list(df_raw.type.unique())

counting number of events per player ~ game



In [3]:

    
def compute_players_type_events(logs):
  df = logs[["playerId", "type"]].copy()
  df["count"] = 1

  df = df.groupby(["playerId", "type"]).agg({ "count" : np.sum })

  df = df.unstack()
  df.columns = df.columns.droplevel()
  df = df.reset_index()
  df = df.fillna(0)
  df = df.set_index([ "playerId" ])
#   print len(df.index)
#   print df.columns
  
  return df

compute_players_type_events(df_raw).head()









    Out[3]:






  
    
      type
      complete
      configure
      craft
      death
      equip
      gotomooc
      gotourl
      pickup
      reach
      restart
      selectmenu
      start
      switch
      unequip
    
    
      playerId
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0023dbb1-7f98-4cdb-8122-722f801f40b3
      0.0
      1.0
      0.0
      3.0
      0.0
      0.0
      0.0
      1.0
      2.0
      0.0
      1.0
      0.0
      0.0
      0.0
    
    
      01b0c435-f0c0-4bfd-9189-86fc0d29b163
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
    
    
      02bc076b-32aa-467a-bbc6-b746abedb7bd
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
    
    
      02c6953a-0417-4858-8efb-1989be9f6b9d
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      2.0
      1.0
      1.0
      0.0
    
    
      0306db66-081d-4035-b30f-8358469d6ec3
      1.0
      0.0
      3.0
      17.0
      8.0
      0.0
      1.0
      5.0
      12.0
      1.0
      2.0
      0.0
      0.0
      2.0

compute game durations



In [4]:

    
def compute_game_durations(logs):

  df = logs.groupby("playerId").agg({ "serverTime": [ np.min, np.max  ] })

  df["duration"] = pd.to_datetime(df["serverTime"]["amax"]) - pd.to_datetime(df["serverTime"]["amin"])
  df["duration"] = df["duration"].map(lambda x: np.timedelta64(x, 's'))

  df = df.loc[:, [("duration", "")]]

  df.columns = df.columns.droplevel()
  df.columns = [ "duration (seconds)" ]

  df["duration (seconds)"] = df["duration (seconds)"].astype(int)/1000000000
  
  return df

compute_game_durations(df_raw).head()









    Out[4]:






  
    
      
      duration (seconds)
    
    
      playerId
      
    
  
  
    
      0023dbb1-7f98-4cdb-8122-722f801f40b3
      175.0
    
    
      01b0c435-f0c0-4bfd-9189-86fc0d29b163
      0.0
    
    
      02bc076b-32aa-467a-bbc6-b746abedb7bd
      0.0
    
    
      02c6953a-0417-4858-8efb-1989be9f6b9d
      19.0
    
    
      0306db66-081d-4035-b30f-8358469d6ec3
      1030.0

compute max checkpoint reached by players



In [5]:

    
def max_reach(x):
  checkpoints = [ int(checkpoint[-2:]) for checkpoint in x if checkpoint != np.nan and not isinstance(checkpoint, float)]
  
  # print checkpoints
  
  if len(checkpoints) > 0:
    return max([ int(checkpoint[-2:]) for checkpoint in x if checkpoint != np.nan and not isinstance(checkpoint, float)])
  else:
    return 0

def adventure(x):
  return "adventure1" in [ w.split(".")[0] for w in x if w != np.nan and not isinstance(w, float) ]

def sandbox(x):
  l = { w.split(".")[0] for w in x if w != np.nan and not isinstance(w, float) }
  
  return ("sandbox1" in l) or ("sandbox2" in l)

def compute_max_reachpoint(logs):
  df = logs.loc[:, ["playerId", "type", "section"]].groupby("playerId").agg({ "section": [ max_reach, adventure, sandbox ]  })
  
  df.columns = df.columns.droplevel()
  
  return df

compute_max_reachpoint(df_raw).head()









    Out[5]:






  
    
      
      max_reach
      adventure
      sandbox
    
    
      playerId
      
      
      
    
  
  
    
      0023dbb1-7f98-4cdb-8122-722f801f40b3
      2
      True
      False
    
    
      01b0c435-f0c0-4bfd-9189-86fc0d29b163
      0
      False
      False
    
    
      02bc076b-32aa-467a-bbc6-b746abedb7bd
      0
      False
      False
    
    
      02c6953a-0417-4858-8efb-1989be9f6b9d
      1
      False
      True
    
    
      0306db66-081d-4035-b30f-8358469d6ec3
      8
      True
      False

compile sessionId



In [6]:

    
def compile_sessionid(raw):
  def sessionid(x):
    
    def norm(t):
      if t != np.nan and not isinstance(t, float) :
        return t.replace("\"", "")
      else:
        return None

    res = { norm(s) for s in x if norm(s) != None }

    if len(res) > 0:
      return list(res)[0]
  
  df = raw.loc[:, ["playerId", "customData.localplayerguid"]].groupby("playerId").agg({ "customData.localplayerguid" : sessionid })
  
  # df["customData.localplayerguid"] = df["customData.localplayerguid"].map(sessionid)

  #df = df.set_index("playerId")
  df = df.rename(columns={ "customData.localplayerguid": "sessionId" })
  
  return df

compile_sessionid(df_raw[0:100]).head()









    Out[6]:






  
    
      
      sessionId
    
    
      playerId
      
    
  
  
    
      3607b3ec-9e2a-4043-9f07-8dbccee66cb3
      None
    
    
      4acee9a7-7295-44c6-85ec-b247a3b483b1
      4a9fded1-7ab7-48f5-b267-2fac6627ea5a
    
    
      5af5f837-2aa1-4d5e-86a3-05d39d6cf63d
      4a9fded1-7ab7-48f5-b267-2fac6627ea5a
    
    
      85de8fbc-b510-47c6-bfcd-ce829712c379
      8c83d8d2-f63a-49b4-b0da-2712080fc4d1
    
    
      9c183209-8867-46bb-9d29-9dda57e5a7bd
      6e665720-b387-423f-8dab-1efade2f0c63

aggregate all data into one data frame



In [7]:

    
players_stats = compute_players_type_events(df_raw)
players_stats = pd.merge(players_stats, compute_game_durations(df_raw), left_index=True, right_index=True)
players_stats = pd.merge(players_stats, compute_max_reachpoint(df_raw), left_index=True, right_index=True)
players_stats = pd.merge(players_stats, compile_sessionid(df_raw), left_index=True, right_index=True)



In [8]:

    
len(players_stats[ (players_stats["adventure"] == False) & (players_stats["sandbox"] == False) ])









    Out[8]:





62



In [9]:

    
len(players_stats[ (players_stats["adventure"] == True) & (players_stats["sandbox"] == True) ])









    Out[9]:





15



In [10]:

    
len(players_stats[ (players_stats["sandbox"] == True) ])









    Out[10]:





20

preview



In [11]:

    
players_stats.head()









    Out[11]:






  
    
      type
      complete
      configure
      craft
      death
      equip
      gotomooc
      gotourl
      pickup
      reach
      restart
      selectmenu
      start
      switch
      unequip
      duration (seconds)
      max_reach
      adventure
      sandbox
      sessionId
    
    
      playerId
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      0023dbb1-7f98-4cdb-8122-722f801f40b3
      0.0
      1.0
      0.0
      3.0
      0.0
      0.0
      0.0
      1.0
      2.0
      0.0
      1.0
      0.0
      0.0
      0.0
      175.0
      2
      True
      False
      None
    
    
      01b0c435-f0c0-4bfd-9189-86fc0d29b163
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0
      False
      False
      e3e1604c-b94b-4669-abc9-01ae57e9b691
    
    
      02bc076b-32aa-467a-bbc6-b746abedb7bd
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0
      False
      False
      32506a02-7174-46f2-89e2-8ffd1089334b
    
    
      02c6953a-0417-4858-8efb-1989be9f6b9d
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      2.0
      1.0
      1.0
      0.0
      19.0
      1
      False
      True
      3e4c5d28-c240-4024-b024-9f6c37375b77
    
    
      0306db66-081d-4035-b30f-8358469d6ec3
      1.0
      0.0
      3.0
      17.0
      8.0
      0.0
      1.0
      5.0
      12.0
      1.0
      2.0
      0.0
      0.0
      2.0
      1030.0
      8
      True
      False
      None



In [13]:

    
players_stats.to_csv("players_stats.csv", encoding="utf-8")

type	complete	configure	craft	death	equip	gotomooc	gotourl	pickup	reach	restart	selectmenu	start	switch	unequip
playerId
0023dbb1-7f98-4cdb-8122-722f801f40b3	0.0	1.0	0.0	3.0	0.0	0.0	0.0	1.0	2.0	0.0	1.0	0.0	0.0	0.0
01b0c435-f0c0-4bfd-9189-86fc0d29b163	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0
02bc076b-32aa-467a-bbc6-b746abedb7bd	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	0.0
02c6953a-0417-4858-8efb-1989be9f6b9d	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	2.0	1.0	1.0	0.0
0306db66-081d-4035-b30f-8358469d6ec3	1.0	0.0	3.0	17.0	8.0	0.0	1.0	5.0	12.0	1.0	2.0	0.0	0.0	2.0

	duration (seconds)
playerId
0023dbb1-7f98-4cdb-8122-722f801f40b3	175.0
01b0c435-f0c0-4bfd-9189-86fc0d29b163	0.0
02bc076b-32aa-467a-bbc6-b746abedb7bd	0.0
02c6953a-0417-4858-8efb-1989be9f6b9d	19.0
0306db66-081d-4035-b30f-8358469d6ec3	1030.0

	max_reach	adventure	sandbox
playerId
0023dbb1-7f98-4cdb-8122-722f801f40b3	2	True	False
01b0c435-f0c0-4bfd-9189-86fc0d29b163	0	False	False
02bc076b-32aa-467a-bbc6-b746abedb7bd	0	False	False
02c6953a-0417-4858-8efb-1989be9f6b9d	1	False	True
0306db66-081d-4035-b30f-8358469d6ec3	8	True	False

	sessionId
playerId
3607b3ec-9e2a-4043-9f07-8dbccee66cb3	None
4acee9a7-7295-44c6-85ec-b247a3b483b1	4a9fded1-7ab7-48f5-b267-2fac6627ea5a
5af5f837-2aa1-4d5e-86a3-05d39d6cf63d	4a9fded1-7ab7-48f5-b267-2fac6627ea5a
85de8fbc-b510-47c6-bfcd-ce829712c379	8c83d8d2-f63a-49b4-b0da-2712080fc4d1
9c183209-8867-46bb-9d29-9dda57e5a7bd	6e665720-b387-423f-8dab-1efade2f0c63